Reads were first de-multiplexed with ea-utils (fastq-multx) with an allowed barcode mismatch of 1 (Default). De-multiplexed reads were then subjected to an alignment against the UniVec vector contamination database using the adapter removal tool within ea-utils (fastq-mcf). Barcodes were extracted by remove the last 12 nucleotides from each read.
Next reads were subjected to an alignment against the Oligo library using a DNA aligner. We used Bowtie2…
We are going to first import the merged counts tables that is outputted from the Nextflow pipeline. This table already has the raw counts summarised from the alignments and merged into a single matrix.
# Import counts
counts = readRDS('../data/CD8-counts.rda')
head(counts)
## cdk453_a cdk453_b d1_10_a d1_10_b d1_100_a d1_100_b d1_1000_a d1_1000_b
## oligo_1 1551 1612 2369 2561 2569 2559 4852 2483
## oligo_2 4038 4485 7276 6028 7360 6630 10887 6771
## oligo_3 1898 2014 2751 2228 3160 2711 4475 2407
## oligo_4 2351 2285 3168 3149 3758 3361 5825 3646
## oligo_5 3251 3546 4672 4928 5789 5114 8571 4926
## oligo_6 2132 2011 3146 2560 3108 2734 4388 2809
## d1_300_a d1_300_b id3_a id3_b mock_a mock_b
## oligo_1 2953 2841 2709 2110 4105 3251
## oligo_2 7919 8438 7396 6572 12822 8352
## oligo_3 3062 3386 3042 2084 4337 3272
## oligo_4 3659 4049 3770 3303 5496 3992
## oligo_5 5662 6434 5276 4044 7890 5831
## oligo_6 3122 3320 3382 2682 5531 3180
# Import metadata
metadata = readRDS('../data/CD8-metadata.rda')
head(metadata)
## Sample Sample name Group Index barcode
## mock_a mock_a mock A Mock GGAATTC
## mock_b mock_b mock B Mock TTAGTGG
## cdk453_a cdk453_a #53 A CDK4 ACAACGA
## cdk453_b cdk453_b #53 B CDK4 ACCAATG
## id3_a id3_a 1D3 A ID3 CAACAGC
## id3_b id3_b 1D3 B ID3 CACACGT
# Import MultiQC results
report = readRDS('../data/CD8-multiqc.rda')
head(report)
## # A tibble: 6 × 47
## Sample raw_total_seque… filtered_sequen… sequences is_sorted `1st_fragments`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 cdk453_… 8434046 0 8434046 1 8434046
## 2 cdk453_… 8685727 0 8685727 1 8685727
## 3 d1_1000… 21558627 0 21558627 1 21558627
## 4 d1_1000… 12555470 0 12555470 1 12555470
## 5 d1_100_… 14081247 0 14081247 1 14081247
## 6 d1_100_… 12753978 0 12753978 1 12753978
## # … with 41 more variables: last_fragments <dbl>, reads_mapped <dbl>,
## # reads_mapped_and_paired <dbl>, reads_unmapped <dbl>,
## # reads_properly_paired <dbl>, reads_paired <dbl>, reads_duplicated <dbl>,
## # reads_MQ0 <dbl>, reads_QC_failed <dbl>, `non-primary_alignments` <dbl>,
## # total_length <dbl>, total_first_fragment_length <dbl>,
## # total_last_fragment_length <dbl>, bases_mapped <dbl>,
## # `bases_mapped_(cigar)` <dbl>, bases_trimmed <dbl>, …
# Import oligo annotations
annotations = readRDS('../data/CD8-annotations.rda')
head(annotations)
## # A tibble: 6 × 3
## oligo_id gene_name gene_id
## <chr> <chr> <chr>
## 1 oligo_1 MAGEA1 MAGEA1 #1
## 2 oligo_2 gene1394 gene1394 #1
## 3 oligo_3 gene2221 gene2221 #1
## 4 oligo_4 gene1715 gene1715 #1
## 5 oligo_5 gene1014 gene1014 #1
## 6 oligo_6 gene500 gene500 #1
# Mean
mean(report$raw_total_sequences)
## [1] 14162045
median(report$raw_total_sequences)
## [1] 13417612
range(report$raw_total_sequences)
## [1] 8434046 23626015
p1 = report %>%
select(Sample, reads_unmapped, reads_mapped) %>%
mutate(Sample = str_replace(Sample, '-bamstats', '')) %>%
pivot_longer(cols = c("reads_unmapped", "reads_mapped"), names_to = "feature", values_to = "reads") %>%
mutate(feature = if_else(feature == "reads_unmapped", "Unmapped", feature)) %>%
mutate(feature = if_else(feature == "reads_mapped", "Mapped", feature)) %>%
ggplot(aes(x = reorder(Sample, reads), y = reads, fill = feature)) +
geom_col() +
coord_flip() +
theme_classic(base_size = 10) +
theme(legend.position = "top") +
scale_fill_brewer(palette = "Set2") +
xlab("") +
ylab("Number of reads") +
ggtitle("CD8: mapping rate")
p1
### Percent of mapped reads
p2 = report %>%
select(Sample, reads_unmapped_percent, reads_mapped_percent ) %>%
mutate(Sample = str_replace(Sample, '-bamstats', '')) %>%
pivot_longer(cols = c("reads_unmapped_percent", "reads_mapped_percent"), names_to = "feature", values_to = "reads") %>%
mutate(reads = reads / 100) %>%
mutate(feature = if_else(feature == "reads_unmapped_percent", "Unmapped", feature)) %>%
mutate(feature = if_else(feature == "reads_mapped_percent", "Mapped", feature)) %>%
ggplot(aes(x = reorder(Sample, reads), y = reads, fill = feature)) +
geom_col() +
coord_flip() +
theme_classic(base_size = 10) +
theme(legend.position = "top") +
scale_fill_brewer(palette = "Set2") +
scale_y_continuous(labels = scales::percent) +
xlab("") +
ylab("Percent of reads")
p2
# Plot together
p = (p1 | p2) + plot_layout(guides = "collect") & theme(legend.position = "bottom")
p
ggsave("figures/Benchmark-CD8-mapping-qc.pdf", p, width = 8, height = 5)
# Match the sample ID's between samples
counts = counts[,match(metadata$Sample, colnames(counts))]
# Make DESeq2 object
ddsMat <- DESeqDataSetFromMatrix(countData = counts,
colData = metadata,
design = ~Group)
# Get normalize counts
ddsMat <- estimateSizeFactors(ddsMat)
# Filter flow counts
ddsMat.fil = ddsMat %>%
filter_low(percentile = 0.05)
# Get normalized counts
counts.norm.fil <- (counts(ddsMat.fil, normalized = TRUE) + 0.5) %>%
as.data.frame() %>%
rownames_to_column("oligo_id")
# Create a long table
counts.norm.fil.long = counts.norm.fil %>%
pivot_longer(cols = -oligo_id, names_to = "sampleId", values_to = "norm")
counts.norm.fil %>%
ggplot(., aes(x = log10(mock_a), y = log10(mock_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("Mock replicate #1 (log10)") +
ylab("Mock replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
counts.norm.fil %>%
ggplot(aes(x = log10(id3_a), y = log10(id3_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1D3 replicate #1 (log10)") +
ylab("1D3 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
counts.norm.fil %>%
ggplot(aes(x = log10(cdk453_a), y = log10(cdk453_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("CDK4/53 replicate #1 (log10)") +
ylab("CDK4/53 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
counts.norm.fil %>%
ggplot(aes(x = log10(d1_10_a), y = log10(d1_10_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:10 replicate #1 (log10)") +
ylab("1:10 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
counts.norm.fil %>%
ggplot(aes(x = log10(d1_100_a), y = log10(d1_100_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:100 replicate #1 (log10)") +
ylab("1:100 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
counts.norm.fil %>%
ggplot(aes(x = log10(d1_300_a), y = log10(d1_300_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:300 replicate #1 (log10)") +
ylab("1:300 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
counts.norm.fil %>%
ggplot(aes(x = log10(d1_1000_a), y = log10(d1_1000_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:1000 replicate #1 (log10)") +
ylab("1:1000 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
counts.norm.fil.long %>%
ggplot(aes(x = log10(norm))) +
geom_histogram(aes(y=..density..), binwidth=0.25, colour="black", fill="white") +
geom_density(alpha = 0.2, fill = "#FF6666") +
facet_wrap(sampleId~.) +
theme_light(base_size = 11) +
ylab("Density") +
xlab("Normalized reads per oligo (log10)")
mart1_mock.res = compute_stats(ddsMat.fil, trt = "ID3", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>%
left_join(annotations) %>%
mutate(log2FoldChange = round(log2FoldChange, 2)) %>%
mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
##
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up) : 0, 0%
## LFC < -0.25 (down) : 3, 0.066%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 330)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
mart1_mock.res %>%
filter(padj < 0.05)
## oligo_id baseMean log2FoldChange lfcSE stat pvalue
## 1 MART1_b 2308.170 -1.17 0.1123802 -8.208217 1.122482e-16
## 2 MART1_ELA_a 1575.126 -2.59 0.1664753 -14.031382 5.008980e-45
## 3 MART1_ELA_b 1709.922 -4.11 0.1546682 -24.974480 5.789449e-138
## padj log2FoldChangeShrunken gene_name gene_id
## 1 1.693077e-13 -0.57 gene812 gene812 #2
## 2 1.133282e-41 -0.73 MART1_ELA MART1_ELA #1
## 3 2.619725e-134 -1.28 MART1_ELA MART1_ELA #2
# Browse
mart1_mock.res %>%
select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>%
DT::datatable(extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
))
# Plot MA-plot
mart1_mock.maplot = mart1_mock.res %>%
ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(data = filter(mart1_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
geom_point(data = filter(mart1_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(mart1_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(mart1_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
geom_text_repel(data = filter(mart1_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
theme_classic() +
theme_classic() +
xlab("Mean counts (log2)") +
ylab("Fold change log2(MART1/Mock)") +
ggtitle("MART1")
ggplotly(mart1_mock.maplot)
d10_mock.res = compute_stats(ddsMat.fil, trt = "D10", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>%
left_join(annotations) %>%
mutate(log2FoldChange = round(log2FoldChange, 2)) %>%
mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
##
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up) : 0, 0%
## LFC < -0.25 (down) : 5, 0.11%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 321)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d10_mock.res %>%
filter(padj < 0.05)
## oligo_id baseMean log2FoldChange lfcSE stat pvalue
## 1 CDK4mut_a 772.9757 -2.59 0.2186967 -10.712447 4.448893e-27
## 2 CDK4mut_b 596.6486 -4.47 0.2832977 -14.898259 1.691392e-50
## 3 MART1_b 2548.5431 -0.75 0.1163850 -4.313486 8.035024e-06
## 4 MART1_ELA_a 1579.6401 -2.55 0.1577153 -14.601475 1.374162e-48
## 5 MART1_ELA_b 1695.6118 -4.35 0.1653377 -24.768457 9.808023e-136
## padj log2FoldChangeShrunken gene_name gene_id
## 1 5.032810e-24 -0.49 CDK4mut CDK4mut #1
## 2 3.826774e-47 -0.50 CDK4mut CDK4mut #2
## 3 7.271696e-03 -0.36 gene812 gene812 #2
## 4 2.072695e-45 -0.81 MART1_ELA MART1_ELA #1
## 5 4.438130e-132 -1.25 MART1_ELA MART1_ELA #2
# Browse
d10_mock.res %>%
select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>%
DT::datatable(extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
))
# Plot MA-plot
d10_mock.maplot = d10_mock.res %>%
ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(data = filter(d10_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
geom_point(data = filter(d10_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d10_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d10_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
geom_text_repel(data = filter(d10_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
theme_classic() +
theme_classic() +
xlab("Mean counts (log2)") +
ylab("Fold change log2(D10/Mock)")
ggplotly(d10_mock.maplot)
d100_mock.res = compute_stats(ddsMat.fil, trt = "D100", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>%
left_join(annotations) %>%
mutate(log2FoldChange = round(log2FoldChange, 2)) %>%
mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
##
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up) : 0, 0%
## LFC < -0.25 (down) : 4, 0.088%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 311)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d100_mock.res %>%
filter(padj < 0.05)
## oligo_id baseMean log2FoldChange lfcSE stat pvalue
## 1 CDK4mut_a 813.4943 -2.14 0.1875386 -10.093039 2.965222e-24
## 2 CDK4mut_b 690.1380 -2.26 0.2605368 -7.721243 5.760017e-15
## 3 MART1_ELA_a 1642.6568 -2.20 0.1250384 -15.620545 2.637637e-55
## 4 MART1_ELA_b 1871.9163 -2.66 0.1137217 -21.188122 6.145161e-100
## padj log2FoldChangeShrunken gene_name gene_id
## 1 4.472543e-21 -0.53 CDK4mut CDK4mut #1
## 2 6.516020e-12 -0.32 CDK4mut CDK4mut #2
## 3 5.967654e-52 -0.96 MART1_ELA MART1_ELA #1
## 4 2.780685e-96 -1.28 MART1_ELA MART1_ELA #2
# Browse
d100_mock.res %>%
select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>%
DT::datatable(extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
))
# Plot MA-plot
d100_mock.maplot = d100_mock.res %>%
ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(data = filter(d100_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
geom_point(data = filter(d100_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d100_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d100_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
geom_text_repel(data = filter(d100_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
theme_classic() +
theme_classic() +
xlab("Mean counts (log2)") +
ylab("Fold change log2(D100/Mock)")
ggplotly(d100_mock.maplot)
d300_mock.res = compute_stats(ddsMat.fil, trt = "D300", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>%
left_join(annotations) %>%
mutate(log2FoldChange = round(log2FoldChange, 2)) %>%
mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
##
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up) : 0, 0%
## LFC < -0.25 (down) : 2, 0.044%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 338)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d300_mock.res %>%
filter(padj < 0.05)
## oligo_id baseMean log2FoldChange lfcSE stat pvalue
## 1 MART1_ELA_a 1993.798 -1.07 0.1143751 -7.143311 4.555459e-13
## 2 MART1_ELA_b 2299.487 -1.24 0.1001225 -9.906465 1.951041e-23
## padj log2FoldChangeShrunken gene_name gene_id
## 1 1.030673e-09 -0.52 MART1_ELA MART1_ELA #1
## 2 8.828460e-20 -0.69 MART1_ELA MART1_ELA #2
# Browse
d300_mock.res %>%
select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>%
DT::datatable(extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
))
# Plot MA-plot
d300_mock.maplot = d300_mock.res %>%
ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(data = filter(d300_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
geom_point(data = filter(d300_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d300_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d300_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
geom_text_repel(data = filter(d300_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
theme_classic() +
theme_classic() +
xlab("Mean counts (log2)") +
ylab("Fold change log2(D300/Mock)")
ggplotly(d300_mock.maplot)
d1000_mock.res = compute_stats(ddsMat.fil, trt = "D1000", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>%
left_join(annotations) %>%
mutate(log2FoldChange = round(log2FoldChange, 2)) %>%
mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>%
mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
##
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up) : 0, 0%
## LFC < -0.25 (down) : 0, 0%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 340)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d1000_mock.res %>%
filter(padj < 0.05)
## [1] oligo_id baseMean log2FoldChange
## [4] lfcSE stat pvalue
## [7] padj log2FoldChangeShrunken gene_name
## [10] gene_id
## <0 rows> (or 0-length row.names)
# Browse
d1000_mock.res %>%
select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>%
DT::datatable(extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
))
# Plot MA-plot
d1000_mock.maplot = d1000_mock.res %>%
ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(data = filter(d1000_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
geom_point(data = filter(d1000_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d1000_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
geom_point(data = filter(d1000_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
geom_text_repel(data = filter(d1000_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
theme_classic() +
xlab("Mean counts (log2)") +
ylab("Fold change log2(D1000/Mock)")
ggplotly(d1000_mock.maplot)
cdk454_mock.res = compute_stats(ddsMat.fil, trt = "CDK4", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>%
left_join(annotations) %>%
mutate(log2FoldChange = round(log2FoldChange, 2)) %>%
mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
##
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up) : 0, 0%
## LFC < -0.25 (down) : 3, 0.066%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 327)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
cdk454_mock.res %>%
filter(padj < 0.05)
## oligo_id baseMean log2FoldChange lfcSE stat pvalue
## 1 CDK4mut_a 747.0887 -2.98 0.1915627 -14.255008 2.086284e-46
## 2 CDK4mut_b 597.9877 -4.40 0.5844750 -7.094939 6.470425e-13
## 3 oligo_4374 1881.8200 -0.67 0.1050117 -4.025151 2.846933e-05
## padj log2FoldChangeShrunken gene_name gene_id
## 1 9.440434e-43 -0.73 CDK4mut CDK4mut #1
## 2 1.463934e-09 -0.10 CDK4mut CDK4mut #2
## 3 4.294124e-02 -0.36 gene2105 gene2105 #2
# Browse
cdk454_mock.res %>%
select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>%
DT::datatable(extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
))
# Plot MA-plot
cdk453_mock.maplot = cdk454_mock.res %>%
ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = . %>% filter(oligo_id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(oligo_id %in% c("MART1_a", "MART1_b"))) +
geom_point(colour = alpha("orange", 0.7), data = . %>% filter(oligo_id %in% c("CDK4wt_a", "CDK4wt_b"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(oligo_id %in% c("CDK4mut_a", "CDK4mut_b"))) +
geom_text_repel(data = filter(cdk454_mock.res, oligo_id %in% c("CDK4wt_a", "CDK4wt_b", "CDK4mut_a", "CDK4mut_b")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
theme_classic() +
theme_classic() +
xlab("Mean counts (log2)") +
ylab("Fold change log2(CDK4 #53/Mock)") +
ggtitle("CDK4 #53")
ggplotly(cdk453_mock.maplot)
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0
## [3] dplyr_1.0.9 purrr_0.3.4
## [5] readr_2.1.2 tidyr_1.2.0
## [7] tibble_3.1.7 tidyverse_1.3.1
## [9] ggpubr_0.4.0 plotly_4.10.0
## [11] ggplotify_0.1.0 readxl_1.4.0
## [13] DESeq2_1.34.0 SummarizedExperiment_1.24.0
## [15] Biobase_2.54.0 MatrixGenerics_1.6.0
## [17] matrixStats_0.62.0 GenomicRanges_1.46.1
## [19] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [21] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [23] ggrepel_0.9.1 ggplot2_3.3.6
## [25] ggsci_2.9 patchwork_1.1.1
## [27] knitr_1.39
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ggsignif_0.6.3 ellipsis_0.3.2
## [4] XVector_0.34.0 fs_1.5.2 rstudioapi_0.13
## [7] farver_2.1.0 DT_0.25 bit64_4.0.5
## [10] AnnotationDbi_1.56.2 fansi_1.0.3 lubridate_1.8.0
## [13] xml2_1.3.3 splines_4.1.3 cachem_1.0.6
## [16] geneplotter_1.72.0 jsonlite_1.8.0 broom_0.8.0
## [19] annotate_1.72.0 dbplyr_2.1.1 png_0.1-7
## [22] BiocManager_1.30.18 compiler_4.1.3 httr_1.4.2
## [25] backports_1.4.1 assertthat_0.2.1 Matrix_1.4-0
## [28] fastmap_1.1.0 lazyeval_0.2.2 cli_3.3.0
## [31] htmltools_0.5.2 tools_4.1.3 gtable_0.3.0
## [34] glue_1.6.2 GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3
## [37] carData_3.0-5 cellranger_1.1.0 jquerylib_0.1.4
## [40] vctrs_0.4.1 Biostrings_2.62.0 crosstalk_1.2.0
## [43] xfun_0.30 rvest_1.0.2 lifecycle_1.0.1
## [46] renv_0.15.4 rstatix_0.7.0 XML_3.99-0.10
## [49] zlibbioc_1.40.0 scales_1.2.0 hms_1.1.1
## [52] parallel_4.1.3 RColorBrewer_1.1-3 yaml_2.3.5
## [55] memoise_2.0.1 yulab.utils_0.0.5 sass_0.4.1
## [58] stringi_1.7.6 RSQLite_2.2.15 highr_0.9
## [61] genefilter_1.76.0 BiocParallel_1.28.3 rlang_1.0.2
## [64] pkgconfig_2.0.3 bitops_1.0-7 evaluate_0.15
## [67] lattice_0.20-45 labeling_0.4.2 htmlwidgets_1.5.4
## [70] bit_4.0.4 tidyselect_1.1.2 magrittr_2.0.3
## [73] R6_2.5.1 generics_0.1.2 DelayedArray_0.20.0
## [76] DBI_1.1.2 pillar_1.7.0 haven_2.5.0
## [79] withr_2.5.0 survival_3.2-13 KEGGREST_1.34.0
## [82] abind_1.4-5 RCurl_1.98-1.6 modelr_0.1.8
## [85] crayon_1.5.1 car_3.0-13 utf8_1.2.2
## [88] tzdb_0.3.0 rmarkdown_2.14 locfit_1.5-9.6
## [91] grid_4.1.3 data.table_1.14.2 blob_1.2.3
## [94] reprex_2.0.1 digest_0.6.29 xtable_1.8-4
## [97] gridGraphics_0.5-1 munsell_0.5.0 viridisLite_0.4.0
## [100] bslib_0.3.1